import os
dir = r'D:\KwDownload\song'
def read_lrc(file):
    try:
        reader = open(file,mode='r',encoding='gbk')
        content = reader.readlines()
    except Exception :

        # reader = open(file, mode='r', encoding='utf-8')
        # content = reader.readlines()
        return []
    datas = []
    for line in content:
        line = line.strip()
        #print(line)
        if ']' in line:
            line = line[line.index(']')+1:]
            if len(line) >0 and '许嵩' not in line and '词：' not in line:
                line = line.replace(' ',',')
                line += '。'
                datas.append(line)
    return datas
def get_all_lyric():
    songs = []
    for file in os.listdir(dir):
        #print(file)
        if 'lrc' in file:
            print(file)
            full_file = os.path.join(dir,file)
            datas = read_lrc(full_file)
            songs.append(datas)
    return songs

def get_lyric_seq2seq_data():
    songs = get_all_lyric()
    pair = []
    for song in songs:
        words = []
        for ix,line in enumerate(song):
            if ix == len(song) - 1:
                continue
            pair.append((line,song[ix+1]))
    return pair

